##########################
##########################
###Main Mediation Model###
##########################
##########################
library(mediation)
library(stargazer)
library(MatchIt)
library(foreign)
library(car)
library(lmtest)

# set working folder. 
setwd("~/OneDrive - Indiana University/FromGoogle/syd/Koren2025FinalCode/")
# read dataset
full.2003.2018 <- read.csv("modmeddataset7225.csv")

# Set RNG
set.seed(123)

#Load p value function
source("mediatiopval.R")
#Load plotting function
source("medplots.R")

#Prepare dat for analysis
# subset na rm data
med.dat <- na.omit(full.2003.2018[,c("gid","clear_zoon_confirm","lagclear_zoon_confirm","Forest.coverage_bin","ag_crops","fires.cell.count_anom","NDVI_mean_anom","log.nl","logged.pop","month")])
#Create regular dummies for the mediate package
unique_month <- unique(med.dat$month)
#run loop 
for (month in unique_month) {
  new_col_name <- paste0("f_month", month)  # Create a column name
  med.dat[[new_col_name]] <- ifelse(med.dat$month == month, 1, 0)  # Assign 1 if matched, else 0
}


#############
##Ag. areas##
#############
# subset only ag areas
med.dat.ag <- subset(med.dat, (ag_crops>0))

#Run base model
med.fit1 <- lm(NDVI_mean_anom ~ fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl +logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(med.fit1)
#CSEs
med.cse1 <- coeftest(med.fit1, vcov. = vcovCL(med.fit1, cluster = med.dat.ag$gid, type = "HC0"))

#Run fit model
out.fit1 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl + logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.ag)
summary(out.fit1)
#CSEs
out.cse1 <- coeftest(out.fit1, vcov. = vcovCL(out.fit1, cluster = med.dat.ag$gid, type = "HC0"))

#Set seed
set.seed(123)
#Create a loop for simulations, accommodating memory limits
sim_results <- list()
for (i in 1:10) {  # Split 1000 sims into 10 runs of 100 each
  sim_results[[i]] <- mediate(med.fit1, out.fit1, 
                              treat = "fires.cell.count_anom", 
                              mediator = "NDVI_mean_anom",
                              sims = 100,   cluster = med.dat.ag$gid)
  gc()  # Garbage collection after each iteration
}

#Extract average p values
pvals_ag <- med_pvals(sim_results)

################
##Forest areas##
################
#Subset only forest data
med.dat.for <- subset(med.dat, (Forest.coverage_bin>0))

#Run base model
med.fit2 <- lm(NDVI_mean_anom ~ fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl + logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(med.fit2)
#CSEs
med.cse2 <- coeftest(med.fit2, vcov. = vcovCL(med.fit2, cluster = med.dat.for$gid, type = "HC0"))

#Run fit model
out.fit2 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl + logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.for)
summary(out.fit2)
#CSEs
out.cse2 <- coeftest(out.fit2, vcov. = vcovCL(out.fit2, cluster = med.dat.for$gid, type = "HC0"))

#Set seed
set.seed(123)
#Create a loop for simulations, accommodating memory limits
sim_results2 <- list()
for (i in 1:10) {  # Split 1000 sims into 10 runs of 100 each
  sim_results2[[i]] <- mediate(med.fit2, out.fit2, 
                               treat = "fires.cell.count_anom", 
                               mediator = "NDVI_mean_anom",
                               sims = 100,   cluster = med.dat.for$gid)
  gc()  # Garbage collection after each iteration
}

#Extract average p values
pvals_for <- med_pvals(sim_results2)

######################
### All other areas###
######################
#Subset all other areas 
med.dat.other <- subset(med.dat, (Forest.coverage_bin==0|ag_crops==0))

#Run base model
med.fit3 <- lm(NDVI_mean_anom ~ fires.cell.count_anom+log.nl + logged.pop+
                 lagclear_zoon_confirm+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.other)
summary(med.fit3)
#CSEs
med.cse3 <- coeftest(med.fit3, vcov. = vcovCL(med.fit3, cluster = med.dat.other$gid, type = "HC0"))

#Run fit model
out.fit3 <- lm(clear_zoon_confirm ~ NDVI_mean_anom+fires.cell.count_anom+
                 lagclear_zoon_confirm+log.nl + logged.pop+
                 f_month2+f_month3+f_month4+f_month5+f_month6+f_month7+
                 f_month8+f_month9+f_month10+f_month11+f_month12, data=med.dat.other)
summary(out.fit3)
#CSEs
out.cse3 <- coeftest(out.fit3, vcov. = vcovCL(out.fit3, cluster = med.dat.other$gid, type = "HC0"))


#Set seed
set.seed(123)
#Create a loop for simulations, accommodating memory limits
sim_results3 <- list()
for (i in 1:20) {  # Split 1000 sims into 20 runs of 50 each
  sim_results3[[i]] <- mediate(med.fit3, out.fit3, 
                               treat = "fires.cell.count_anom", 
                               mediator = "NDVI_mean_anom",
                               sims = 50,   cluster = med.dat.other$gid)
  gc()  # Garbage collection after each iteration
}

#Extract average p values
pvals_oth <- med_pvals(sim_results3)

#Combine all average p-values
pvals <- cbind(pvals_ag, pvals_for, pvals_oth)
pvals

##Combine all estimates together
sim_results_list <- list(sim_results, sim_results2, sim_results3)

#Generate plots -- model name "main"
med_plots(model.name="main", sim_results_list=sim_results_list)

##Export mediation models
# Open a file connection
sink("tableMain.txt")

# --- First: Export full regression tables (to LaTeX or text) ---
stargazer(med.cse1, out.cse1, med.cse2, out.cse2, med.cse3, out.cse3,
          type = "text", 
          star.cutoffs = c(0.05, 0.01), 
          style = "default")

cat("\n\n% --- Goodness-of-fit stats only ---\n\n")

# --- Second: Export only N, R², and Adjusted R² ---
stargazer(med.fit1, out.fit1, med.fit2, out.fit2, med.fit3, out.fit3,
          type = "text", 
          keep.stat = c("n", "rsq", "adj.rsq"),
          omit = ".*")

# Close the file
sink()

####Coefficient equliaty tests
###Vegetation anommalies
##Model 1 and 2
linearHypothesis(out.fit1, "NDVI_mean_anom = 0.0001226242")
##Model 1 and 3
linearHypothesis(out.fit1, "NDVI_mean_anom = 0.00003805028")
#Model 2 and 3
linearHypothesis(out.fit2, "NDVI_mean_anom = 0.00003805028")

###Fire anommalies
##Model 1 and 2
linearHypothesis(med.fit1, "fires.cell.count_anom = -0.4045350")
##Model 1 and 3
linearHypothesis(med.fit1, "fires.cell.count_anom = -0.3952012")
#Model 2 and 3
linearHypothesis(med.fit2, "fires.cell.count_anom = -0.3952012")